
* Create fertility rates at the month-level and at the parity-level

use "$datatemp/birthplace_codes.dta", clear
sort Birthplace
save "$datatemp/birthplace_codes.dta", replace

set excelxlsxlargefile on


import excel using "$data/birthplace_year1child_month1child.xlsx", clear firstrow sheet("numbirth") cellrange(E9:J211584)
drop if Birthyear==.
sort Birthplace Birthyear Birthmonth 
save "$datatemp/birthplace_year1child_month1child_2010.dta", replace

import excel using "$data/birthplace_year_month_numbirth_0210.xlsx", clear firstrow sheet ("numbirth") cellrange(F8:K150158)
drop if Birthyear==.
sort Birthplace Birthyear Birthmonth
save "$datatemp/birthplace_year_month_numbirth_0210.dta", replace

* Create the number of women ages 15 to 44
use "$datatemp/numwomen_1544_89cens_official.dta", clear
append using "$datatemp/gfr_90_repl.dta"
rename Year Birthyear
sort Birthplace_code Birthyear
save "$datatemp/numwomen_1544_all.dta", replace


use "$datatemp/birthplace_year1child_month1child_2010.dta", clear
sort Birthplace Birthyear Birthmonth
merge (Birthplace Birthyear Birthmonth) using "$datatemp/birthplace_year_month_numbirth_0210.dta"
keep if _merge==3
drop _merge

* Merge birthplace codes
sort Birthplace
merge (Birthplace) using "$datatemp/birthplace_codes.dta"
keep if _merge==3
drop _merge
drop if Birthmonth==0
drop if Birthyear== 0

sort Birthplace_code Birthyear
merge (Birthplace_code Birthyear) using "$datatemp/numwomen_1544_all.dta"
keep if _merge==3
save "$datatemp/numbirth_census0210_month.dta", replace

collapse (sum) Numbirth1st_02 Numbirth1st_10 numbirth_2010 numbirth_2002 numbirth (min) loc (mean) numwomen_1544, by(Birthplace_code Birthyear)
gen numbirth_2ndpl = numbirth_2010-Numbirth1st_10
keep numbirth_2002 numbirth_2010 Numbirth1st_10 numwomen_1544 loc Birthplace_code Birthyear
rename Birthyear Year
sort Birthplace_code Year
save "$datatemp/births_0210_year.dta", replace

** Create share of individuals still present in the census by year and birth oblast
use "$datatemp/rosstat_gfr_oblast_year.dta", clear
append using "$datatemp/gfr_90_repl.dta"
replace numbir=numbirth if Year>=1990 & Year<=1993
drop numbirth
sort Birthplace_code Year
merge (Birthplace_code Year) using "$datatemp/births_0210_year.dta"
keep if _merge==3
gen present02_per = numbirth_2002/numbir
gen present10_per = numbirth_2010/numbir
keep Birthplace_code Year present02_per present10_per
sort Birthplace_code Year
save "$datatemp/absent_per.dta", replace

* Adjust fertility rates using the probability that you are still present
use "$datatemp/numbirth_census0210_month.dta", clear
drop _merge
rename Birthyear Year
sort Birthplace_code Year
merge (Birthplace_code Year) using "$datatemp/absent_per.dta"
keep if _merge==3
drop _merge
gen numbirth_2002_adj = numbirth_2002/present02_per
gen numbirth_2010_adj = numbirth_2010/present10_per
gen numbirth_2ndpl = numbirth_2010-Numbirth1st_10
gen numbirth_2ndpl_adj = numbirth_2ndpl/present10_per
gen Numbirth1st_10_adj = Numbirth1st_10/present10_per

gen gfr_2002_adj = numbirth_2002_adj*1000*12/numwomen_1544
gen gfr_2nd_adj = numbirth_2ndpl_adj*1000*12/numwomen_1544
gen gfr_1st_adj = Numbirth1st_10_adj*1000*12/numwomen_1544

gen w1 = numwomen_1544 if Year==1980
bysort Birthplace_code: egen numwomen_80 = max(w1)
gen w2 = numwomen_1544 if Year==1979
bysort Birthplace_code: egen numwomen_79 = max(w1)
drop Birthplace_rus republic Numbirth1st_02 Numbirth1st_10 w1 w2 present02_per present10_per numbirth_2010 numbirth_2002 numbirth_2ndpl numwomen_80 numbirth

label variable Year "Year of observation"
label variable Birthplace "Name of an oblast"
label variable Birthmonth "Month of observation"
label variable Numbirth1st_10 "Number of first births"
label variable numbirth_2010 "Number of births in the 2010 census"
label variable numbirth_2002 "Number of births in the 2002 census"
label variable Birthplace_code "Unique numerical code for each oblast"
label variable loc "equals to 1 for an early beneficiary and 2 for a late beneficiary"
label variable numwomen_1544 "Number of women age 15 to 44"
* label variable numbirth "Number of births from official Rosstat data from 1990"
label variable numbirth_2002_adj "Number of births in the 2002 census adjusted for migration and mortality"
label variable numbirth_2010_adj "Number of births in the 2010 census adjusted for migration and mortality"
label variable numbirth_2ndpl_adj "Number of second births in the 2010 census adjusted for migration and mortality"
label variable Numbirth1st_10_adj "Number of first births in the 2010 census adjusted for migration and mortality"
label variable gfr_2002_adj "General Fertility Rate using the 2002 census adjusted for migration and mortality"
label variable gfr_2nd_adj "Second and higher parity birth fertility rate using the 2010 census adjusted for migration and mortality"
label variable gfr_1st_adj "First birth fertility rate using the 2010 census adjusted for migration and mortality"
label variable numwomen_79 "Number of women age 15 to 44 in 1979"

save "$datatemp/gfr_adjusted.dta", replace


